// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package keypose;

// 4x4 transform matrix, row major order.
// Transform is cam_T_world, from world to camera coordinates.
message Transform {
  repeated float element = 1;
}

message Camera {
  optional float fx = 1;  // In pixels.
  optional float fy = 2;
  optional float cx = 3;
  optional float cy = 4;
  optional float baseline = 5;  // In meters.
  optional float resx = 6;      // Image resolution in pixels.
  optional float resy = 7;
}

// Targets are assocated with an image:
//   Transform from world to camera, cam_T_world.
//   Camera parameters.
//   Keypoints in the image.
message Target {
  optional keypose.Transform transform = 1;
  optional keypose.Camera camera = 2;

  message KeyPoint {
    optional float u = 1;  // Image coords (pixels) of kp.
    optional float v = 2;
    optional float x = 3;  // x,y are normalized image coords of kp, NOT 3D x,y.
    optional float y = 4;  // Normalized coords are in [0, 1].
    optional float z = 5;  // Distance from camera (m) of kp.
    optional float visible = 6 [default = 1.0];  // 1.0 if kp is onscreen.
  }

  repeated KeyPoint keypoints = 3;
}

// Keypoint and projection targets for an image.
// This is the top-level pb for stereo samples.
message KeyTargets {
  optional keypose.Target kp_target = 1;
  repeated keypose.Target proj_targets = 2;
  optional bool mirrored = 3 [default = false];
}

// Dataset parameters.
message DataParams {
  // TODO: resx, resy are redundant given the camera.
  optional int32 resx = 1;
  optional int32 resy = 2;
  optional int32 num_kp = 3;
  optional keypose.Camera camera = 4;
}

// TFRecord sets for training and evaluation
message TfSet {
  optional string name = 1;  // Output will be in keypose/tfrecords/<name>.
  optional string common_dir = 2;  // Directory prefix common to all files,
                                   // e.g. keypose/data/bottle_0.
  repeated string train = 3;  // Training tfrecord dirs (e.g. */756x424_fl400).
  repeated string val = 4;    // Val tfrecord dirs (texture_5_*/756x424_fl400).
                              // Note: val dirs are excluded from train dirs.
  repeated string exclude = 5;     // Exclude these dirs from train/val.
  optional string image_size = 6;  // Resized image, e.g., 756x424_fl400.
                                   // fl400 means focal length 400 pixels.
}
